In [ ]:
import os
from os import path
# Third-party
from astropy.table import Table
import astropy.units as u
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
import h5py
import pandas as pd
import tqdm
from thejoker import JokerSamples
from twoface.config import TWOFACE_CACHE_PATH
from twoface.samples_analysis import unimodal_P
from twoface.db import (db_connect, AllStar, AllVisit, AllVisitToAllStar,
StarResult, Status, JokerRun, initialize_db)
In [ ]:
plot_path = '../../paper/1-catalog/figures/'
table_path = '../../paper/1-catalog/tables/'
In [ ]:
Session, _ = db_connect(path.join(TWOFACE_CACHE_PATH, 'apogee.sqlite'))
session = Session()
samples_file = path.join(TWOFACE_CACHE_PATH, 'apogee-jitter.hdf5')
control_samples_file = path.join(TWOFACE_CACHE_PATH, 'apogee-jitter-control.hdf5')
In [ ]:
with h5py.File(control_samples_file) as f:
control_K = np.full((len(f.keys()), 256), np.nan)
for i, key in enumerate(f):
n_samples = len(f[key]['K'])
control_K[i, :n_samples] = f[key]['K'][:]
ln_control_K = np.log(control_K)
In [ ]:
n_samples = np.sum(np.logical_not(np.isnan(control_K)), axis=1)
plt.hist(n_samples, bins=np.linspace(0, 256, 64));
plt.yscale('log')
plt.xlabel('$N$ samples returned')
In [ ]:
needs_mcmc = 0
needs_more_prior = 0
with h5py.File(control_samples_file) as f:
keys = list(f.keys())
for k in tqdm.tqdm(np.where(n_samples < 256)[0]):
key = keys[k]
data = AllStar.get_apogee_id(session, key).apogeervdata()
samples = JokerSamples.from_hdf5(f[key])
uni = unimodal_P(samples, data)
if uni:
needs_mcmc += 1
else:
needs_more_prior += 1
In [ ]:
needs_mcmc, needs_more_prior
In [ ]:
fig, axes = plt.subplots(1, 2, figsize=(10, 5))
ax = axes[0]
for perc in [1, 5, 15]:
control_perc = np.nanpercentile(ln_control_K, perc, axis=1)
ax.hist(control_perc, bins=np.linspace(-12, 10, 64),
alpha=0.5, label='{0} percentile'.format(perc));
ax = axes[1]
for perc in [85, 95, 99]:
control_perc = np.nanpercentile(ln_control_K, perc, axis=1)
ax.hist(control_perc, bins=np.linspace(-12, 10, 64),
alpha=0.5, label='{0} percentile'.format(perc));
for ax in axes:
ax.legend(loc='best', fontsize=14)
ax.set_xlabel(r'$\ln \left(\frac{K}{{\rm km}\,{s}^{-1}} \right)$')
ax.set_yscale('log')
axes[0].set_title('control sample')
fig.tight_layout()
In [ ]:
# cut = -0.88 # 5% FPR
cut = -0.12 # 1% FPR
np.sum(np.nanpercentile(ln_control_K, 1., axis=1) > cut) / control_K.shape[0]
In [ ]:
df = pd.read_hdf('../../cache/apogee-jitter-tbl.hdf5')
grouped = df.groupby('APOGEE_ID')
df.columns
In [ ]:
K_per = grouped.agg(lambda x: np.percentile(np.log(x['K']), 1))['K']
(K_per > cut).sum()
In [ ]:
high_K_tbl = Table()
high_K_tbl['APOGEE_ID'] = np.asarray(K_per.index).astype('U20')
high_K_tbl['lnK_per_1'] = np.asarray(K_per)
high_K_tbl.write(path.join(table_path, 'lnK-percentiles.fits'), overwrite=True)
In [ ]:
high_K_tbl[:8].write(path.join(table_path, 'lnK-percentiles.tex'), overwrite=True)
In [ ]:
high_K = np.asarray(K_per[K_per > cut].index).astype('U20')
len(high_K)
In [ ]:
for apogee_id in tqdm.tqdm(high_K):
star = AllStar.get_apogee_id(session, apogee_id)
res = star.results[0] # only one result...
res.high_K = True
session.commit()
In [ ]:
_N = session.query(AllStar).join(StarResult).filter(StarResult.high_K).count()
print(_N)
assert _N == len(high_K)
In [ ]:
fig, ax = plt.subplots(1, 1, figsize=(6, 5))
control_perc = np.nanpercentile(ln_control_K, 1, axis=1)
bins = np.linspace(-12, 10, 64)
ax.hist(K_per, bins=bins,
alpha=1, label='APOGEE sample', normed=True,
histtype='stepfilled', rasterized=True)
ax.hist(control_perc, bins=bins,
alpha=1, label='Control sample', normed=True,
histtype='step', linewidth=2, color='#333333')
ax.legend(loc='best', fontsize=13)
ax.set_yscale('log')
ax.set_xlabel(r'$\ln \left(\frac{K}{{\rm km}\,{s}^{-1}} \right)$')
ax.set_ylabel('density')
ax.axvline(cut, linestyle='--', zorder=10, alpha=1., color='tab:orange')
fig.tight_layout()
fig.savefig(path.join(plot_path, 'lnK-percentiles.pdf'), dpi=250)
In [ ]: